package com.music.download.crawler;

import cn.edu.hfut.dmic.webcollector.model.CrawlDatums;
import cn.edu.hfut.dmic.webcollector.model.Page;
import cn.hutool.core.lang.Dict;
import com.google.common.collect.Lists;
import org.jsoup.select.Elements;

import java.util.List;
import java.util.stream.Collectors;

public class CrawlerMusic extends BaseCrawler {

    private List<Dict> musicList = Lists.newArrayList();

    /**
     * 构造方法
     * @param crawlPath
     * @param autoParse
     */
    public CrawlerMusic(String crawlPath, boolean autoParse, String url) {
        super(crawlPath, autoParse);
        this.addSeed(url);
    }

    /**
     * 数据爬取
     * @param page
     * @param next
     */
    @Override
    public void visit(Page page, CrawlDatums next) {
        Elements elements = page.select("ul[class=\"f-hide\"] > li >a");
        musicList = elements.stream().map(element ->
            Dict.create().set("id", element.attr("href").split("=")[1]).set("name", element.text())
        ).collect(Collectors.toList());
    }

    /**
     * 获取爬取的音乐
     * @return
     */
    public List<Dict> getMusicList() {
        return musicList;
    }
}
